rm(list=ls())

### R package
# install the spsR package from naoki-egami/spsR. For reproducibility, we specify the exact version.
library(devtools)
install_github("naoki-egami/spsR@1532c8b5445ed282f203410bbad76345c69d71bf", 
               dependencies = TRUE)
library(spsR)


### Load data 
load("data/sps_data.rdata")

# Standardize Covariates
cov_use_id <- 2:ncol(data_use)
X_use <- as.matrix(data_use[, cov_use_id])
scale_no_ind <- c(6:10)
for(i in 1:ncol(X_use)){
  if((i %in% scale_no_ind) == FALSE){
    X_use[,i] <- scale(X_use[,i])
  }
}
cov_name <- c("GDP", "Polity", "Eth_Frac", "Dist_to_China", "Dist_to_US",
              "subreg_EA", "subreg_MA", "subreg_SA", "subreg_WA", "CR")
rownames(X_use) <- data_use$country
colnames(X_use) <- cov_name

## Stratified SPS
# Stratify each column
cov_use <- cov_name[- scale_no_ind]
st_large <- st_small <- list()
for(v in 1:length(cov_use)){
st_large[[v]] <- stratify_sps(X = X_use, 
                              num_site = list("at least", 1), 
                              condition = list(cov_use[v], "larger than or equal to", 0.5))
st_small[[v]] <- stratify_sps(X = X_use, 
                              num_site = list("at least", 1), 
                              condition = list(cov_use[v], "smaller than or equal to", -0.5))
}

# Region 
C_r_EA  <- stratify_sps(X_use, num_site = list("at least", 1), condition = list("subreg_EA", "larger than or equal to", 0.8))
C_r_MA  <- stratify_sps(X_use, num_site = list("at least", 1), condition = list("subreg_MA", "larger than or equal to", 0.8))
C_r_SA  <- stratify_sps(X_use, num_site = list("at least", 1), condition = list("subreg_SA", "larger than or equal to", 0.8))
C_r_WA  <- stratify_sps(X_use, num_site = list("at least", 1), condition = list("subreg_WA", "larger than or equal to", 0.8))

# CR
C_r_CR  <- stratify_sps(X_use, num_site = list("at least", 5), condition = list("CR", "larger than or equal to", 0.8))

st_combined <- c(st_large, st_small, list(C_r_EA, C_r_MA, C_r_SA, C_r_WA, C_r_CR))

### Running Synthetic Purposive Sampling
out <- sps(X = X_use, N_s = 5, stratify = st_combined, seed = 1)
save(out, file = "sps_out_pica2_final.rdata")

### Selected countries
out$selected_sites

### Check Diversity
colnames(out$internal$X)[1:5] <- c("GDP","Polity","Ethnic Frac.", "Dist. to China","Dist. to US")
pdf("figures/Figure_F1.pdf", height = 6, width = 6)
sps_plot(out, columns = colnames(out$internal$X)[1:5])
dev.off()
